#爬取永康房产网出售房源信息
import requests
from lxml import etree
import xlwt
from xlwt import Workbook


def chushou():
    book = Workbook(encoding='utf-8')
    url = 'http://www.ykfcw.net/sale/'
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                     'Chrome/69.0.3497.92 Safari/537.36',
        'Referer':'http://www.ykfcw.net/'
        }
    response = requests.get(url,headers=headers)
    xml = etree.HTML(response.text)
    lastpage = xml.xpath('//div[@id="pagelist"]/a[last()]/@href')  #最大页数
    pagenum = eval(lastpage[0][10:12])
    print('最大页数',pagenum)
    p = 1
    while p <= pagenum:
        sheetname = '第'+str(p)+'页'
        sheetp= book.add_sheet(sheetname)
        sheetp.write(0,0,'地段')
        sheetp.write(0,1,'房源名称')
        sheetp.write(0,2,'样式')
        sheetp.write(0,3,'楼层')
        sheetp.write(0,4,'厅室描述')
        sheetp.write(0,5,'装修情况')
        sheetp.write(0,6,'面积')
        sheetp.write(0,7,'单位面积报价')
        sheetp.write(0,8,'总价')
        sheetp.write(0,9,'发布日期')
        sheetp.write(0,10,'房源链接')
        response = requests.get(url,headers=headers)
        xml = etree.HTML(response.text)
        diduan_list = xml.xpath('//ul[@class="fylist"]/li[2]/a/text()')  #地段
        name_list = xml.xpath('//ul[@class="fylist"]/li[3]/a/text()')  #房源名称
        leixing_list = xml.xpath('//ul[@class="fylist"]/li[4]/a/text()') #样式
        louceng_list = xml.xpath('//ul[@class="fylist"]/li[5]/a/text()')  #楼层
        miaoshu_list = xml.xpath('//ul[@class="fylist"]/li[6]/a/text()')  #厅室描述
        zhuangxiu_list = xml.xpath('//ul[@class="fylist"]/li[7]/a/text()')  #装修
        square_list = xml.xpath('//ul[@class="fylist"]/li[8]/a/text()')  #面积
        moneyper_list = xml.xpath('//ul[@class="fylist"]/li[9]/a/text()')  #单价
        money_list = xml.xpath('//ul[@class="fylist"]/li[10]/a/text()')  #总价
        time_list = xml.xpath('//ul[@class="fylist"]/li[11]/a/text()')  #发布日期
        href_list = xml.xpath('//ul[@class="fylist"]/li[2]/a/@href')  #房源链接
        for i in range(0,len(name_list)):
            sheetp.write(i+1,0,diduan_list[i])
            sheetp.write(i+1,1,name_list[i])
            sheetp.write(i+1,2,leixing_list[i])
            sheetp.write(i+1,3,louceng_list[i])
            sheetp.write(i+1,4,miaoshu_list[i])
            sheetp.write(i+1,5,zhuangxiu_list[i])
            sheetp.write(i+1,6,square_list[i])
            sheetp.write(i+1,7,moneyper_list[i])
            sheetp.write(i+1,8,money_list[i])
            sheetp.write(i+1,9,time_list[i])
            href = 'http://www.ykfcw.net'+href_list[i]
            sheetp.write(i+1,10,href)
        print('已保存第',p,'页')
    
        from datetime import datetime
        now_time = str(datetime.now())[0:10]
        title = now_time+'房产出售信息(房产网).xlsx'
        book.save(title)
        if p < pagenum:
            p += 1
            url = 'http://www.ykfcw.net/sale/page'+str(p)+'.html'
        else:break

def heikeji():    
    try:
        chushou()
    except:
        print('这倒霉网站又响应超时，重新开始')
        heikeji()
        
heikeji()
print('保存完毕')
input('回车退出')